#
# questionsPerYear.R
#
# Create a table noting the number of questions analyzed
# per year for a specific analysis.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

library(data.table)
library(bit64)

# Call in data.
load("sample_output/jagsoutFull25kchain1Norm.RData")
cat("Called in data at",paste(Sys.time()),"\n");flush.console()

# Data.table matching yearparty to year.
yp <- data.table(as.data.frame(jdata$codes,stringsAsFactors=F)) # map from yearparty to year and party
yp[,yearparty := as.numeric(as.character(yearparty))]; yp[,year := as.numeric(as.character(year))]

# Data.table of responses.
responses <- data.table(jdata$y)
# Figure out year of each response row.
responses[,yearparty := jdata$yearparty]
responses <- merge(responses,yp,by="yearparty",all.x=T)
responses[,yearparty := NULL]
# Drop 1948, 1952 and 1954 with no items/no glue.
cat("Dropping records for cases in 1948, 1952, and 1954...\n")
responses <- responses[!year < 1955,]
# Identify variables to count.
count.vars <- names(responses)[regexpr("year|pid3",names(responses)) == -1]

# Aggregate counts of non-missing cases by year and party.
resp.counts.yp <- responses[,lapply(.SD,function(x) sum(!is.na(x))),.SDcols=count.vars,by=c("pid3","year")]
# Aggregate counts of non-missing cases by year only.
resp.counts.yr <- responses[,lapply(.SD,function(x) sum(!is.na(x))),.SDcols=count.vars,by=c("year")]

# Count of questions with at least one answer by year.
counts.by.year <- data.table(Year=resp.counts.yr[,year],
                              "Number questions"=rowSums(resp.counts.yr[,count.vars,with=F]>0))
# Join in number of responses by party-year.
counts.by.py  <- data.table(Year=resp.counts.yp[,year],pid3=resp.counts.yp[,pid3],
                              count=rowSums(resp.counts.yp[,count.vars,with=F]))
# Dem responses.
counts.by.year <- merge(counts.by.year,counts.by.py[pid3 == "D",c("Year","count"),with=F],
                        by="Year",all.x=T,all.y=F)
setnames(counts.by.year,gsub("count","Democrat",names(counts.by.year)))
# Rep responses.
counts.by.year <- merge(counts.by.year,counts.by.py[pid3 == "R",c("Year","count"),with=F],
                        by="Year",all.x=T,all.y=F)
setnames(counts.by.year,gsub("count","Republican",names(counts.by.year)))
# Ind responses.
counts.by.year <- merge(counts.by.year,counts.by.py[pid3 == "I",c("Year","count"),with=F],
                        by="Year",all.x=T,all.y=F)
setnames(counts.by.year,gsub("count","Independent",names(counts.by.year)))

# Write to tex.
out <- paste(paste(names(counts.by.year),collapse=" & "),"\\\\")
makeRow <- function(x) {
  paste(x[1],"&",paste(prettyNum(x[2:length(x)],","),collapse=" & "),"\\\\")
}
out <- c(out,apply(counts.by.year,1,makeRow))
# Add totals.
out <- c(out,"\\hline")
out <- c(out,makeRow(c("Total",ncol(resp.counts.yr)-1,counts.by.year[,sum(Democrat)],
                       counts.by.year[,sum(Republican)],counts.by.year[,sum(Independent)])))
cat(paste(out,collapse="\n"))
write(paste(out,collapse="\n"),file="questionsResponsesPerYear.tex")
